package net.bwie.flink.utils;

import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;

import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * 分词器工具类
 * @author xuanyu
 * @date 2023/8/29
 */
public class AnalyzerUtil {

	/**
	 * 使用IKAnalyzer分词器对中文文本进行普通分词
	 */
	public static List<String> ikAnalyzer(String content) throws Exception {
		ArrayList<String> list = new ArrayList<>() ;
		// 分词对象
		IKSegmenter ikSegmenter = new IKSegmenter(
			new StringReader(content), true
		) ;
		// 遍历
		Lexeme lexeme;
		while ((lexeme = ikSegmenter.next()) != null) {
			String text = lexeme.getLexemeText();
			list.add(text);
		}
		// 返回
		return list ;
	}

	public static void main(String[] args) throws Exception{
		List<String> list = ikAnalyzer("我 苹果IPhoneX手机 的 了");
		System.out.println(Arrays.toString(list.toArray()));
	}

}
